Let’s see what the maps have to say
The objective of this take home exercise is to understand the GeoVisualization using tmaps and answer some of the questions presented in the VAST Challenge 2022
packages = c('rmarkdown','sf','tmap','tidyverse','clock','lubridate','sftime','lwgeom')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
Here we are reading the files in two formats- sf and csv. This is done so that functions such as inner join can be performed and also the data can be used to create tmaps.
schoolscsv <- read_csv("data/Schools.csv")
pubcsv<-read_csv("data/Pubs.csv")
apartmentscsv<-read_csv("data/Apartments.csv")
buildingscsv<-read_csv("data/buildings.csv")
employerscsv<-read_csv("data/Employers.csv")
restaurantcsv<-read_csv("data/Restaurants.csv")
schools <- read_sf("data/Schools.csv",
options = "GEOM_POSSIBLE_NAMES=location")
pubs<-read_sf("data/Pubs.csv",
options = "GEOM_POSSIBLE_NAMES=location")
apartments<-read_sf("data/Apartments.csv",
options = "GEOM_POSSIBLE_NAMES=location")
buildings<-read_sf("data/buildings.csv",
options = "GEOM_POSSIBLE_NAMES=location")
employers<-read_sf("data/Employers.csv",
options = "GEOM_POSSIBLE_NAMES=location")
restaurant<-read_sf("data/Restaurants.csv",
options = "GEOM_POSSIBLE_NAMES=location")
Reading the status logs
logs<-readRDS("data/logs_fread.rds")
To calculate the participants at restaurants, places of work, pubs, we are performing an inner join to get the exact location of the participant at a given instance.
Let us also go through the Participants file for us to revisit the age groups and educational background.
Participants<-read_csv("data/Participants.csv")
ParticipantsAgeRegrouped<-Participants%>%mutate(agegroup=case_when(age<30~"Below 30",
age>=30 &age<40~"30-39",
age>=40 &age<50~"40-49",
age>=50 ~"50 and above"))
Before we continue with the data analysis, it is necessary for us to check for biasing. Our objective here is to figure out if the data provided to us has any kind of Sampling bias. To verify this, we can look at the residential status of the participants.
Any sampling detected (for example- majority of the participants are residing in one side of the city) may affect our observations.
For this, we will use the logs to find out the count of participants in the apartments and map it with the corresponding building file.
PartatApartments<-logs%>%
distinct(apartmentId,participantId)%>%
group_by(apartmentId)%>%
tally()%>%
mutate(apartmentId=as.character(apartmentId))
apartmentdetails<-left_join(apartmentscsv%>%
mutate(apartmentId=as.character(apartmentId)),
PartatApartments,
by=c("apartmentId"="apartmentId"))
buildingdetails<-apartmentdetails%>%
group_by(buildingId)%>%
summarise(residents=sum(n))
write_csv(buildingdetails,"data/buildingdetails.csv")
ReadingBuildingDetails<-read_csv("data/buildingdetails.csv")
buildingoccupancy<-left_join(buildings,
ReadingBuildingDetails%>%
mutate(buildingId=as.character(buildingId)),
by=c("buildingId"="buildingId"))
tmap_mode("plot")
tm_shape(buildingoccupancy)+
tm_polygons(col = "residents",
border.col = "grey",
style="cont",
palette = "Blues",
border.lwd = 1,
border.alpha = 0.5,
colorNA = "white")+
tm_layout(frame=F,
main.title = "Do we see any discrmination?",
main.title.size = 2,
legend.position = c("right","top"),
legend.height = 0.2,
legend.width = 0.2)+
tm_compass()+
tm_credits("Based on the sample of 1000 participants",
position = c("left","bottom"))
Now that we can confirm that the data is quite uniform, we can begin with our analysis answering the following questions:-
Here we are going to study the commercially happening place of the city.Assuming that the working hours are 8 am - 8 pm
QuantityOfPoints<-EmployeesAtWork%>%filter(AtWork=="Yes")%>%
group_by(currentLocation,participantId)%>%
tally()%>%
mutate(participantId=as.character(participantId))
EmployeesEducationLevel<-left_join(QuantityOfPoints,
ParticipantsAgeRegrouped%>%
mutate(
participantId=as.character(participantId)),
by=c("participantId"="participantId"))
QuantityOfEmployeesEduLevel<-EmployeesEducationLevel%>%
group_by(currentLocation,educationLevel)%>%
tally()
write_csv(QuantityOfEmployeesEduLevel,"data/PartAtEmployers.csv")
z_sf<-read_sf("data/PartAtEmployers.csv",options = "GEOM_POSSIBLE_NAMES=currentLocation")
tmap_mode("plot")
tm_shape(buildings)+
tm_polygons(col = "white",
border.col = "grey",
border.lwd = 1) +
tm_shape(z_sf%>%mutate(n=as.numeric(n)))+
tm_dots(col="educationLevel",
size="n",
palette="Set1")+
tm_layout(main.title = "Where is the work?*",
main.title.size = 2,
legend.height = 0.3,
legend.width = 0.3,
legend.outside = FALSE,
legend.position = c("right", "top"),
frame = FALSE)+
tm_compass()+
tm_credits("*Observed from Mon-Fri at 8 am-8 pm",
position=c("left", "bottom"))
People like to dine out and enjoy meals. We can study and figure out what is footfall during the weekday and weekend.
PartatRestaurants_updated<-PartatRestaurants%>%
mutate(StartTime=format(timestamp,"%H:%M:%S"),
weekday=weekdays(timestamp))%>%
group_by(weekday,restaurantId,currentLocation,foodCost)%>%
tally()%>%
mutate(Day=case_when(weekday != "Saturday" & weekday != "Sunday"~"Weekday",
weekday=="Saturday" | weekday=="Sunday"~"Weekend"))
write_csv(PartatRestaurants_updated,"data/w.csv")
PartatRestaurants_updatedSF<-read_sf("data/w.csv",options = "GEOM_POSSIBLE_NAMES=currentLocation")
tmap_mode("view")
tm_shape(buildings)+
tm_polygons(col = "white",
border.col = "grey",
border.lwd = 1) +
tm_shape(PartatRestaurants_updatedSF%>%
select(-weekday)%>%
mutate(n=as.numeric(n),
restaurantId=as.character(restaurantId),
foodCost=as.numeric(foodCost)))+
tm_bubbles("n",col="foodCost",popup.vars=c("restaurantId"))+
tm_facets(c("Day"),nrow=1,ncol=2, sync = TRUE)
recreationalplaces<-logs%>%
filter(currentMode=="AtRecreation")%>%
distinct(currentLocation,participantId)
write_csv(recreationalplaces,"data/PartRecreational.csv")
R<-read_sf("data/PartRecreational.csv",options = "GEOM_POSSIBLE_NAMES=currentLocation")
A<-R%>%distinct(currentLocation)
l<-R%>%
group_by(participantId)%>%tally()%>%
filter(n>=2)%>%st_cast("LINESTRING")
PartAgeGroup<-left_join(l,ParticipantsAgeRegrouped%>%mutate(participantId=as.character(participantId)),
by=c("participantId"="participantId"))
tmap_mode("view")
tm_shape(buildings)+
tm_polygons(col = "buildingType",
border.col = "grey",
border.lwd = 1)+
tm_shape(PartAgeGroup)+
tm_lines(col="agegroup",
palette="Set2")+
tm_shape(R)+
tm_dots()
Now that we have introduced traffic, let us understand the traffic conditions in the city and how it can be addressed.
Traffic<-logs%>%filter(currentMode=="Transport")%>%
mutate(StartTime=format(timestamp,"%H:%M:%S"),
weekday=weekdays(timestamp),
TimeOfDay=case_when(
StartTime>="08:00:00" & StartTime<="09:00:00"~"Morning",
StartTime>="18:00:00" & StartTime<="19:00:00"~"Evening",
))%>%group_by(TimeOfDay,currentLocation)%>%tally()
write_csv(Traffic,"data/trafficdata.csv")
ReadingTrafficData<-read_sf("data/trafficdata.csv",options = "GEOM_POSSIBLE_NAMES=currentLocation")
MorningTrafficData_updated<-ReadingTrafficData%>%mutate(n=as.numeric(n))%>%
filter(TimeOfDay=="Morning")
EveningTrafficData_updated<-ReadingTrafficData%>%mutate(n=as.numeric(n))%>%
filter(TimeOfDay=="Evening")
Since we are dealing with density, we will create a box map
Creating a function which helps us determine the break points.
boxbreaks <- function(v,mult=1.5) {
qv <- unname(quantile(v[[1]]))
iqr <- qv[4] - qv[2]
upfence <- qv[4] + mult * iqr
lofence <- qv[2] - mult * iqr
# initialize break points vector
bb <- vector(mode="numeric",length=7)
# logic for lower and upper fences
if (lofence < qv[1]) { # no lower outliers
bb[1] <- lofence
bb[2] <- floor(qv[1])
} else {
bb[2] <- lofence
bb[1] <- qv[1]
}
if (upfence > qv[5]) { # no upper outliers
bb[7] <- upfence
bb[6] <- ceiling(qv[5])
} else {
bb[6] <- upfence
bb[7] <- qv[5]
}
bb[3:5] <- qv[2:4]
return(bb)
}
get.var <- function(vname,df) {
v <- df[vname] %>% st_set_geometry(NULL)
v <- unname(v[,1])
return(v)
}
breakss <- function(vnam, df,
legtitle=NA,
mtitle="Box Map",
mult=1.5){
var <- get.var(vnam,df)
bbb <- boxbreaks(var)
return(bbb)
}
Plotting the map
tmap_mode("plot")
MorningTraffic<-tm_shape(buildings)+
tm_polygons(col = "buildingType",
border.col = "grey",
border.lwd = 1) +
tm_shape(MorningTrafficData_updated)+
tm_dots(size=0.05,col="n",
border.lwd=NA,
breaks=breakss("n",MorningTrafficData_updated),
palette="Reds",
labels = c("lower outlier",
"< 25%",
"25% - 50%",
"50% - 75%",
"> 75%",
"upper outlier"))+
tm_layout(main.title = "Morning Rush",
main.title.size = 2,
legend.height = 0.2,
legend.width = 0.2,
legend.outside = FALSE,
legend.position = c("right", "top"),
frame = FALSE)+
tm_credits("Morning peak: 8 am-9 am",
position=c("left","bottom"),
size=0.5)
EveningTraffic<-tm_shape(buildings)+
tm_polygons(col = "buildingType",
border.col = "grey",
border.lwd = 1) +
tm_shape(EveningTrafficData_updated)+
tm_dots(size=0.05,col="n",
border.lwd=NA,
breaks=breakss("n",EveningTrafficData_updated),
palette="Reds",
labels = c("lower outlier",
"< 25%",
"25% - 50%",
"50% - 75%",
"> 75%",
"upper outlier"))+
tm_layout(main.title = "Evening Rush",
main.title.size =2,
legend.height = 0.2,
legend.width = 0.2,
legend.outside = FALSE,
legend.position = c("right", "top"),
frame = FALSE)+
tm_credits("Evening peak: 6 pm-7 pm",
position=c("left","bottom"),
size=0.5)+
tm_compass()
tmap_arrange(MorningTraffic,EveningTraffic,outer.margins = 0.02)